# Prepare Data for Supply Model

churn_flag <- FALSE
network_flag <- FALSE

	# Data needed:
		# Pricing factors (IJ * 1)
		# Cost factors (IJ * 1)
		# Risk adjustment factors (J * 1, but probably just make IJ * 1)
		# Base Premiums (F * 1)
		# Base Costs (F * 1)

##### Load Data
	
	setwd("C:/Users/eas24f/OneDrive - Florida State University/Research/Inertia RESTAT/Data")
	mlr_data <- read.csv("mlr_data.csv",header=TRUE,row.names=1)
	rate_data <- read.csv("convergence_analysis_updated.csv",row.names=1)
	
	if(network_flag) {
		rate_data <- read.csv("convergence_analysis_updated_net.csv",row.names=1)
		julia_data <- read.csv(file="ca_julia_data_JUN182021_net.csv",header=TRUE)	
		households <- get(load("ca_household_characteristics_JUN182021_net"))
		plans = read.csv("ca_plan_characteristics_JUN182021_net.csv", header = TRUE) # high-level plans
	} else if(churn_flag) {
		rate_data <- read.csv("convergence_analysis_updated.csv",row.names=1)
		julia_data <- read.csv(file="ca_julia_data_AUG032019_churn.csv",header=TRUE)	
		households <- get(load("ca_household_characteristics_AUG032019_churn"))	
		plans = read.csv("ca_plan_characteristics_AUG032019_churn.csv", header = TRUE) # high-level plans
	} else {
		rate_data <- read.csv("convergence_analysis_updated.csv",row.names=1)
		julia_data <- read.csv(file="ca_julia_data_AUG032019_small.csv",header=TRUE)	
		households <- get(load("ca_household_characteristics_AUG032019_small"))
		plans = read.csv("ca_plan_characteristics_AUG032019_small.csv", header = TRUE) # high-level plans
	}
	
	data <- get(load("ca_enrollment_data_AUG022019")) # "Cleaned" Covered California enrollment data
	data <- data[data$household_year %in% rownames(households),]
	gc()
		
	
	rownames(plans) <- plans$Plan_Name
		
	plan_data <- read.csv("ca_plan_data2.csv") # Covered California plan data by year and rating area
	plan_data$Plan_ID_Order <- as.character(plan_data$Plan_ID_Order)
	rownames(plan_data) <- paste(plan_data$Plan_Name,plan_data$ENROLLMENT_YEAR,
		plan_data$region,sep="")
	plan_data <- plan_data[plan_data$Plan_ID_Order,]
	
	geogdata <- read.csv("ca_cms_geographic_factors.csv",header=TRUE,row.names = 1)
	age_rating_factors <- read.csv("age_rating_factors.csv",row.names=1) # CCIIO default rating curve
		
		
##### Julia demand data

	
	julia_data$plan_name <- as.character(julia_data$plan_name)
	julia_data$AV <- julia_data$av
	julia_data$av <- NULL
	
	plans$Insurer <- as.character(plans$Insurer)
	plans[plans$Insurer == "Western","Insurer"] <- "Small_Insurer"
	
	# Insurer, HMO
	
	insurers <- c("Anthem","Blue_Shield","Kaiser","Health_Net","Small_Insurer")
	network_types <- c("HMO","PPO")
	
	plans$Plan_Name_NOCSR <- plans$Plan_Name
	julia_data$plan_name_nocsr <- julia_data$plan_name
	
	plans <- plans[sort(rownames(plans)),]
	
	plans[plans$AV == 0.55,"AV"] <- 0.6
	plans$HSA <- NULL
	plans$MSP <- NULL
	
	
##### Clean up plan data
	
		plan_data$Insurer <- plan_data$Issuer_Name
		plan_data$Insurer <- as.character(plan_data$Insurer)
		plan_data[plan_data$Insurer == "Anthem Blue Cross","Insurer"] <- "Anthem"
		plan_data[plan_data$Insurer == "Blue Shield","Insurer"] <- "Blue_Shield"
		plan_data[plan_data$Insurer == "Chinese Community","Insurer"] <- "Chinese_Community"
		plan_data[plan_data$Insurer == "Contra Costa Health Plan","Insurer"] <- "Contra_Costa"
		plan_data[plan_data$Insurer == "Health Net","Insurer"] <- "Health_Net"
		plan_data[plan_data$Insurer == "LA Care","Insurer"] <- "LA_Care"
		plan_data[plan_data$Insurer == "Western Health","Insurer"] <- "Western_Health"
	
		plan_data$Year <- plan_data$ENROLLMENT_YEAR
		plan_data$Metal_Level <- plan_data$metal_level
		plan_data$Type <- plan_data$PLAN_NETWORK_TYPE
	
		# Assign an integer to each plan
		
		data$plan_unique_id <- paste(data$plan_name,data$year,data$rating_area,sep="")
		
		plan_names <- sort(unique(plan_data$Plan_Name_Small_NOHSACAT))
		plan_numbers <- 1:length(plan_names)
		names(plan_numbers) <- plan_names
		
		plan_data$Plan_Name_Small_NOHSACAT <- as.character(plan_data$Plan_Name_Small_NOHSACAT)
		data$Plan_Name_Small_NOHSACAT <- plan_data[data$plan_id,"Plan_Name_Small_NOHSACAT"]
			
		data$plan_number <- NA
		data[!is.na(data$plan_id),"plan_number"] <- plan_numbers[data[!is.na(data$Plan_Name_Small_NOHSACAT),"Plan_Name_Small_NOHSACAT"]]
		data[is.na(data$plan_id),"plan_number"] <- max(data$plan_number,na.rm=TRUE) + 1
		plan_data$plan_number <- plan_numbers[plan_data$Plan_Name_Small_NOHSACAT]
	
		rate_data$insurer_year <- rownames(rate_data)
		data$insurer_small <- data$insurer
		data[!data$insurer_small %in% insurers & !is.na(data$insurer),"insurer_small"] <- "Small_Insurer"
		
		# Convert 40-year old premium to 21 year-old premium
		plan_data$Premium <- plan_data$Premium/1.278		
	
	
	
##### Clean up MLR Data		
		
		mlr_data[is.na(mlr_data$HIOS_ISSUER_ID),"HIOS_ISSUER_ID"] <- 99999
		mlr_data$GROUP_AFFILIATION <- as.character(mlr_data$GROUP_AFFILIATION)
		mlr_data[mlr_data$HIOS_ISSUER_ID == 27603,"GROUP_AFFILIATION"] <- "Anthem"
		mlr_data[mlr_data$HIOS_ISSUER_ID == 70285,"GROUP_AFFILIATION"] <- "Blue_Shield"
		mlr_data[mlr_data$HIOS_ISSUER_ID == 47579,"GROUP_AFFILIATION"] <- "Chinese_Community"
		mlr_data[mlr_data$HIOS_ISSUER_ID == 99483,"GROUP_AFFILIATION"] <- "Contra_Costa"
		mlr_data[mlr_data$HIOS_ISSUER_ID %in% c(67138,99110),"GROUP_AFFILIATION"] <- "Health_Net"
		mlr_data[mlr_data$HIOS_ISSUER_ID == 40513,"GROUP_AFFILIATION"] <- "Kaiser"
		mlr_data[mlr_data$HIOS_ISSUER_ID == 92815,"GROUP_AFFILIATION"] <- "LA_Care"
		mlr_data[mlr_data$HIOS_ISSUER_ID == 18126,"GROUP_AFFILIATION"] <- "Molina"
		mlr_data[mlr_data$HIOS_ISSUER_ID == 92499,"GROUP_AFFILIATION"] <- "Sharp"
		mlr_data[mlr_data$HIOS_ISSUER_ID == 84014,"GROUP_AFFILIATION"] <- "Valley"
		mlr_data[mlr_data$HIOS_ISSUER_ID == 93689,"GROUP_AFFILIATION"] <- "Western_Health"
		mlr_data[is.na(mlr_data$HIOS_ISSUER_ID),"GROUP_AFFILIATION"] <- "Small_Insurer"
			
		# Remove small insurers
		mlr_data <- mlr_data[mlr_data$GROUP_AFFILIATION %in% insurers,]
			
		# Combine the two Health Nets		
		for (t in min(mlr_data$year):max(mlr_data$year)) {		
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Member_months"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Member_months"] + 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"Member_months"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Premiums"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Premiums"] + 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"Premiums"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Claims"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Claims"] +
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"Claims"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"ACA_taxes_fees"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"ACA_taxes_fees"] +
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"ACA_taxes_fees"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"FDSTLCL_taxes_fees"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"FDSTLCL_taxes_fees"] +
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"FDSTLCL_taxes_fees"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Admin_costs"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Admin_costs"] + 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"Admin_costs"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Var_Admin_Low"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Var_Admin_Low"] +
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"Var_Admin_Low"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Var_Admin_High"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Var_Admin_High"] +
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"Var_Admin_High"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Fixed_Admin_Low"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Fixed_Admin_Low"] + 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"Fixed_Admin_Low"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Fixed_Admin_High"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Fixed_Admin_High"] + 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"Fixed_Admin_High"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"MLR_Rebate"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"MLR_Rebate"] + 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"MLR_Rebate"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Risk_adjustment_rec"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Risk_adjustment_rec"] + 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"Risk_adjustment_rec"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Reinsurance_rec"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Reinsurance_rec"] + 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"Reinsurance_rec"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Risk_corridor_rec"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Risk_corridor_rec"] + 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"Risk_corridor_rec"]
			mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Premiums_w3Rs"] <- 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 67138,"Premiums_w3Rs"] + 
				mlr_data[mlr_data$year == t & mlr_data$HIOS_ISSUER_ID == 99110,"Premiums_w3Rs"]
		}
		
		# Remove PPO Health Net
		mlr_data <- mlr_data[!mlr_data$HIOS_ISSUER_ID == 99110,]
		rownames(mlr_data) <- paste(mlr_data$GROUP_AFFILIATION,mlr_data$year,sep="_")	
		
	
##### Pricing, Cost, and Risk Adjustment factors
	
	data$age_cost_factor <- data$geog_cost_factor <- NA
	data[!is.na(data$AGE) & data$year <= 2017,"age_cost_factor"] <- 
		age_rating_factors[as.character(pmin(64,data[!is.na(data$AGE)  & data$year <= 2017,"AGE"])),
			"Rating_Factor"] 
	data[!is.na(data$AGE) & data$year > 2017,"age_cost_factor"] <- 
		age_rating_factors[as.character(pmin(64,data[!is.na(data$AGE)  & data$year > 2017,"AGE"])),
			"Rating_Factor2018"] 
	data[!is.na(data$plan_id),"geog_cost_factor"] <- plan_data[data[!is.na(data$plan_id),"plan_id"],"Plan_Geog_Factor"] 
	data$plan_factor <- plan_data[data$plan_id,"Pricing_Value"]
	data$cost_factor <- data$age_cost_factor * data$geog_cost_factor * data$plan_factor
	
	# Age/Smoking Cost Factor at Household Level
	households$age_tob_cost_factor <- by(data$age_cost_factor,data$household_year,sum)[rownames(households)] 
	gc()
	
	# Risk Adjustment Factor (AV and moral hazard factors from CMS paper on transfer formula)
	silver_plans <- c("Silver","Silver - Enhanced 73","Silver - Enhanced 87","Silver - Enhanced 94","Silver73","Silver87","Silver94")
	plan_data$ra_factor <- 0.6 * 1
	plan_data[plan_data$Metal_Level %in% c("Minimum Coverage","Catastrophic"),"ra_factor"] <- 0.57 * 1 
	plan_data[plan_data$Metal_Level %in% silver_plans,"ra_factor"] <- 0.7 * 1.03 
	plan_data[plan_data$Metal_Level %in% c("Gold"),"ra_factor"] <- 0.8 * 1.08 
	plan_data[plan_data$Metal_Level %in% c("Platinum"),"ra_factor"] <- 0.9 * 1.15 
		
	# Add region and year to julia_data
	julia_data$region <- households[julia_data$household_number,"rating_area"]
	julia_data$year <- households[julia_data$household_number,"year"]
	julia_data$plan_unique_id <- paste(julia_data$plan_name,julia_data$year,julia_data$region,sep="")
	
	# Need plan_name to refer to plan_data object
	
		julia_data$plan_name_ref <- julia_data$plan_name
	
		# Anthem
		julia_data[!julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "ANT_BR","plan_name_ref"] <- "ANT_BR2"
		julia_data[!julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "ANT_BR_HSA","plan_name_ref"] <- "ANT_BR_HSA2"
		julia_data[!julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "ANT_CAT","plan_name_ref"] <- "ANT_CAT2"
		julia_data[!julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "ANT_G","plan_name_ref"] <- "ANT_G2"
		julia_data[!julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "ANT_P","plan_name_ref"] <- "ANT_P2"
		julia_data[!julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "ANT_SIL","plan_name_ref"] <- "ANT_SIL2"
		
		# Blue Shield
		julia_data[!julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "BS_BR","plan_name_ref"] <- "BS_BR2"
		julia_data[!julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "BS_BR_HSA","plan_name_ref"] <- "BS_BR2_HSA"
		julia_data[!julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "BS_CAT","plan_name_ref"] <- "BS_CAT2"
		julia_data[!julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "BS_G","plan_name_ref"] <- "BS_G2"
		julia_data[!julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "BS_P","plan_name_ref"] <- "BS_P2"
		julia_data[!julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "BS_SIL","plan_name_ref"] <- "BS_SIL2"
		
		# Health Net
		epo_regions <- c(2,4,5,8,9,10)
		hsp_regions <- c(15,16,17,18,19)

		julia_data[julia_data$region %in% epo_regions & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_BR","plan_name_ref"] <- "HN_BR2"
		julia_data[julia_data$region %in% epo_regions & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_CAT","plan_name_ref"] <- "HN_CAT2"
		julia_data[julia_data$region %in% epo_regions & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_G","plan_name_ref"] <- "HN_G2"
		julia_data[julia_data$region %in% epo_regions & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_P","plan_name_ref"] <- "HN_P2"
		julia_data[julia_data$region %in% epo_regions & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_SIL","plan_name_ref"] <- "HN_SIL2"
		
		julia_data[julia_data$region %in% hsp_regions & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_BR","plan_name_ref"] <- "HN_BR4"
		julia_data[julia_data$region %in% hsp_regions & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_CAT","plan_name_ref"] <- "HN_CAT4"
		julia_data[julia_data$region %in% hsp_regions & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_G","plan_name_ref"] <- "HN_G4"
		julia_data[julia_data$region %in% hsp_regions & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_P","plan_name_ref"] <- "HN_P4"
		julia_data[julia_data$region %in% hsp_regions & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_SIL","plan_name_ref"] <- "HN_SIL4"
		
		julia_data[julia_data$region %in% c(7,14) & julia_data$year == 2015 & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_BR","plan_name_ref"] <- "HN_BR2"
		julia_data[julia_data$region %in% c(7,14) & julia_data$year == 2015 & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_CAT","plan_name_ref"] <- "HN_CAT2"
		julia_data[julia_data$region %in% c(7,14) & julia_data$year == 2015 & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_G","plan_name_ref"] <- "HN_G2"
		julia_data[julia_data$region %in% c(7,14) & julia_data$year == 2015 & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_P","plan_name_ref"] <- "HN_P2"
		julia_data[julia_data$region %in% c(7,14) & julia_data$year == 2015 & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_SIL","plan_name_ref"] <- "HN_SIL2"
		
		julia_data[julia_data$region %in% c(1,3,7,11) & julia_data$year %in% c(2016,2017) & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_BR","plan_name_ref"] <- "HN_BR4"
		julia_data[julia_data$region %in% c(1,3,7,11) & julia_data$year %in% c(2016,2017) & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_CAT","plan_name_ref"] <- "HN_CAT4"
		julia_data[julia_data$region %in% c(1,3,7,11) & julia_data$year %in% c(2016,2017) & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_G","plan_name_ref"] <- "HN_G4"
		julia_data[julia_data$region %in% c(1,3,7,11) & julia_data$year %in% c(2016,2017) & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_P","plan_name_ref"] <- "HN_P4"
		julia_data[julia_data$region %in% c(1,3,7,11) & julia_data$year %in% c(2016,2017) & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_SIL","plan_name_ref"] <- "HN_SIL4"
		
		julia_data[julia_data$region %in% c(14:19) & julia_data$year %in% c(2016:2019) & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_BR","plan_name_ref"] <- "HN_BR4"
		julia_data[julia_data$region %in% c(14:19) & julia_data$year %in% c(2016:2019) & !julia_data$plan_unique_id %in% rownames(plan_data) & julia_data$plan_name_ref == "HN_CAT","plan_name_ref"] <- "HN_CAT4"
		
		# Small Insurer
		
			plan_data$Plan_Name <- as.character(plan_data$Plan_Name)
			plan_data$Plan_Name_Small <- as.character(plan_data$Plan_Name_Small)
		
			for(n in c(1:19)) {
				for(t in c(2014:2019)) {
					
					# Catastrophic indices
					cat_indices <- which(plan_data$Plan_Name_Small == "SMALL_CAT" & plan_data$ENROLLMENT_YEAR == t & plan_data$region == n)
					if(length(cat_indices) > 0) {
						julia_data[julia_data$region == n & julia_data$year == t & julia_data$plan_name_ref == "SMALL_CAT","plan_name_ref"]  <- 
							plan_data[cat_indices[which.min(plan_data[cat_indices,"Premium"])],"Plan_Name"]
					}
					
					# Bronze indices
					bronze_indices <- which(plan_data$Plan_Name_Small == "SMALL_BR" & plan_data$ENROLLMENT_YEAR == t & plan_data$region == n)
					if(length(bronze_indices) > 0) {
						julia_data[julia_data$region == n & julia_data$year == t & julia_data$plan_name_ref == "SMALL_BR","plan_name_ref"]  <- 
							plan_data[bronze_indices[which.min(plan_data[bronze_indices,"Premium"])],"Plan_Name"]
					}
					
					# Bronze HSA indices
					bronze_hsa_indices <- which(plan_data$Plan_Name_Small == "SMALL_BR_HSA" & plan_data$ENROLLMENT_YEAR == t & plan_data$region == n)
					if(length(bronze_hsa_indices) > 0) {
						julia_data[julia_data$region == n & julia_data$year == t & julia_data$plan_name_ref == "SMALL_BR_HSA","plan_name_ref"]  <- 
							plan_data[bronze_hsa_indices[which.min(plan_data[bronze_hsa_indices,"Premium"])],"Plan_Name"]
					}
					
					# Silver indices
					silver_indices <- which(plan_data$Plan_Name_Small == "SMALL_SIL" & plan_data$ENROLLMENT_YEAR == t & plan_data$region == n)
					if(length(silver_indices) > 0) {
						julia_data[julia_data$region == n & julia_data$year == t & julia_data$plan_name_ref == "SMALL_SIL","plan_name_ref"]  <- 
							plan_data[silver_indices[which.min(plan_data[silver_indices,"Premium"])],"Plan_Name"]
					}
					
					# Gold indices
					gold_indices <- which(plan_data$Plan_Name_Small == "SMALL_G" & plan_data$ENROLLMENT_YEAR == t & plan_data$region == n)
					if(length(gold_indices) > 0) {
						julia_data[julia_data$region == n & julia_data$year == t & julia_data$plan_name_ref == "SMALL_G","plan_name_ref"]  <- 
							plan_data[gold_indices[which.min(plan_data[gold_indices,"Premium"])],"Plan_Name"]
					}
					
					# Platinum indices
					platinum_indices <- which(plan_data$Plan_Name_Small == "SMALL_P" & plan_data$ENROLLMENT_YEAR == t & plan_data$region == n)
					if(length(platinum_indices) > 0) {
						julia_data[julia_data$region == n & julia_data$year == t & julia_data$plan_name_ref == "SMALL_P","plan_name_ref"]  <- 
							plan_data[platinum_indices[which.min(plan_data[platinum_indices,"Premium"])],"Plan_Name"]
					}
				}
			}
		
		
		julia_data$plan_unique_id <- paste(julia_data$plan_name_ref,julia_data$year,julia_data$region,sep="")
		julia_data[julia_data$plan_name_ref == "Uninsured","plan_unique_id"] <- NA
		
	# Add rating factor to julia data
	julia_data$rating_factor <- households[julia_data$household_number,"rating_factor"]
	julia_data$age_factor <- julia_data$rating_factor
	
	# Add age cost factor to julia data
	julia_data$age_tob_cost_factor <- households[julia_data$household_number,"age_tob_cost_factor"]
		
	# Add pricing factor to julia data
	insured_indices <- which(!is.na(julia_data$plan_unique_id))
	julia_data$pricing_factor <- 0
	julia_data[insured_indices,"pricing_factor"] <- 
		plan_data[julia_data[insured_indices,"plan_unique_id"],"Pricing_Factor"] * julia_data[insured_indices,"rating_factor"]
	
	# Add age-geog factors to julia data
	julia_data$age_geog_factor <- 0
	julia_data[insured_indices,"age_geog_factor"] <- 
		plan_data[julia_data[insured_indices,"plan_unique_id"],"Plan_Geog_Factor"] * julia_data[insured_indices,"rating_factor"]
	
	# Add age-geog cost factors to julia data
	julia_data$age_geog_cost_factor <- 0
	julia_data[insured_indices,"age_geog_cost_factor"] <- 
		plan_data[julia_data[insured_indices,"plan_unique_id"],"Plan_Geog_Factor"] * julia_data[insured_indices,"age_tob_cost_factor"]
	
	# Add cost factors to julia data
	julia_data$cost_factor <- 0
	julia_data[insured_indices,"cost_factor"] <- 
		plan_data[julia_data[insured_indices,"plan_unique_id"],"Pricing_Factor"] * julia_data[insured_indices,"age_tob_cost_factor"]
	
	# Add RA factors to julia data
	julia_data$ra_factor <- 0
	julia_data[insured_indices,"ra_factor"] <- 
		plan_data[julia_data[insured_indices,"plan_unique_id"],"ra_factor"]
	
	# Add CMS geographic factor
	julia_data$GCF <- geogdata[paste(households[julia_data$household_number,"rating_area"],
		households[julia_data$household_number,"year"],sep="_"),"GCF"] 
	
	
#### Add base plan premiums to plans object

	plan_data$insurer_small <- plan_data$Insurer
	plan_data[!plan_data$insurer_small %in% insurers,"insurer_small"] <- "Small_Insurer"
		
	plans$Premium_2014 <- plans$Premium_2015 <- plans$Premium_2016 <-
		plans$Premium_2017 <- plans$Premium_2018 <- plans$Premium_2019 <- 0
	
	base_plan_region <- 15
	
	for(t in c(2014:2019)) {
		
		col <- paste("Premium",t,sep="_")
		
		for(f in insurers) {
			plan_indices <- which(plan_data$ENROLLMENT_YEAR == t & 
				plan_data$region == base_plan_region & plan_data$insurer_small == f)
			base_plan_index <- intersect(plan_indices,which(plan_data$Base_Plan == 1))	
			
			# Toss CSR plans
			plan_indices <- setdiff(plan_indices,which(plan_data$metal_level %in% 
				c("Silver - Enhanced 73","Silver - Enhanced 87","Silver - Enhanced 94")))
			
			# Toss HSA plans
			plan_indices <- setdiff(plan_indices,which(plan_data$HSA == 1))
			
			# Toss Catastrophic Plans
			plan_indices <- setdiff(plan_indices,which(plan_data$Metal_Level == "Minimum Coverage"))
			
			# Check if any duplicates - you'll definitely have duplicates for Small insurer, possibly Health Net
			duplicates <- plan_indices[which(duplicated(plan_data[plan_indices,"Plan_Name_Small_NOHSACAT"]))]
			if(length(duplicates) > 0) {
				duplicate_plan_names <- unique(plan_data[duplicates,"Plan_Name_Small_NOHSACAT"])
				for(j in duplicate_plan_names) {
					all_duplicate_indices <- intersect(plan_indices,which(plan_data$Plan_Name_Small_NOHSACAT == j))
					lowest_prem_index <- all_duplicate_indices[which.min(plan_data[all_duplicate_indices,"Premium"])]
					
					# Remove all but the lowest_prem_index
					plan_indices <- setdiff(plan_indices,setdiff(all_duplicate_indices,lowest_prem_index))
				}
			}
			
			# Insert Premium into plans object
			plans[plan_data[plan_indices,"Plan_Name_Small_NOHSACAT"],col] <- plan_data[plan_indices,"Premium"]
			
		}
	}
	
	# A few issues with the above
	
		# Anthem not in 15 in 2018-2019 (use RA7 as base)
		for(t in c(2018:2019)) {
			col <- paste("Premium",t,sep="_")
			plan_indices <- which(plan_data$ENROLLMENT_YEAR == t & 
					plan_data$region == 7 & plan_data$insurer_small == "Anthem")
			base_plan_index <- intersect(plan_indices,which(plan_data$Base_Plan == 1))	
			
			# Toss CSR plans
			csr_plans <- intersect(plan_indices,which(plan_data$metal_level %in% 
				c("Silver - Enhanced 73","Silver - Enhanced 87","Silver - Enhanced 94")))
			plan_indices <- setdiff(plan_indices,csr_plans)
			
			# Insert Premium into plans object
			plans[plan_data[plan_indices,"Plan_Name_Small_NOHSACAT"],col] <- plan_data[plan_indices,"Premium"]
		}		
	
	
		# Health PPO (HN_SIL,HN_G,HN_P) not in 15 in 2014-2017
		for(t in c(2014:2017)) {
		
			col <- paste("Premium",t,sep="_")
			HN_base_premium <- plan_data[which(plan_data$ENROLLMENT_YEAR == t & 
					plan_data$Base_Plan == 1 & plan_data$insurer_small == "Health_Net"),"Premium"]
			
			for(j in c("HN_SIL","HN_G","HN_P")) {
				
				# Pull pricing value from one rating area and multiply by base premium (pricing values are all the same)
				
				plans[j,col] <- 
					plan_data[plan_data$ENROLLMENT_YEAR == t & plan_data$region == 2 & 
					plan_data$insurer_small == "Health_Net" & plan_data$Plan_Name_Small == j,"Pricing_Value"] * HN_base_premium
			}
		}
		
		# Health Net PPO Bronze not in 15 in 2015
		for(t in c(2015)) {
		
			col <- paste("Premium",t,sep="_")
			HN_base_premium <- plan_data[which(plan_data$ENROLLMENT_YEAR == t & 
					plan_data$Base_Plan == 1 & plan_data$insurer_small == "Health_Net"),"Premium"]
			
			for(j in c("HN_BR")) {
				
				# Pull pricing value from one rating area and multiply by base premium (pricing values are all the same)
				
				plans[j,col] <- 
					plan_data[plan_data$ENROLLMENT_YEAR == t & plan_data$region == 2 & 
					plan_data$insurer_small == "Health_Net" & plan_data$Plan_Name_Small == j,"Pricing_Value"] * HN_base_premium
			}
		}
		
		
	# Remove unneeded columns
	plans$Plan_Name_NOCSR <- NULL
	plans$MSP <- NULL
	plans$Premium <- NULL
	plans$Catastrophic <- NULL
	plans$Bronze <- NULL
	plans$Gold <- NULL
	plans$Platinum <- NULL
	
#### Create plans_pt object

	plans_pt <- rbind(plans,plans,plans,plans,plans,plans)
	plans_pt$year <- c(rep(2014,nrow(plans)),rep(2015,nrow(plans)),rep(2016,nrow(plans)),
						rep(2017,nrow(plans)),rep(2018,nrow(plans)),rep(2019,nrow(plans)))
	plans_pt$Premium <- c(plans$Premium_2014,plans$Premium_2015,plans$Premium_2016,
							plans$Premium_2017,plans$Premium_2018,plans$Premium_2019)
	plans_pt$plan_name <- plans$Plan_Name
	plans_pt$pt_name <- paste(plans_pt$plan_name,plans_pt$year,sep="")
	
	plans_pt$reins_factor <- rate_data[paste(plans_pt$Insurer,plans_pt$year,sep="_"),"RI"]/rate_data[paste(plans_pt$Insurer,plans_pt$year,sep="_"),"Claims"]			
	plans_pt[is.na(plans_pt$reins_factor),"reins_factor"] <- 0		
	
	plans_pt$variable_admin <- pmin(mlr_data[paste(plans_pt$Insurer,plans_pt$year,sep="_"),"Var_Admin_Low"]/
		mlr_data[paste(plans_pt$Insurer,plans_pt$year,sep="_"),"Member_months"],65)	
	plans_pt[is.na(plans_pt$variable_admin),"variable_admin"] <- 0		
	
	plans_pt$fixed_admin <- mlr_data[paste(plans_pt$Insurer,plans_pt$year,sep="_"),"Fixed_Admin_Low"]/
		mlr_data[paste(plans_pt$Insurer,plans_pt$year,sep="_"),"Member_months"]
	plans_pt[is.na(plans_pt$fixed_admin),"fixed_admin"] <- 0					
	
	keep_fields <- c("pt_name","plan_name","year","Metal_Level","AV","Silver","HMO","Insurer",
		"Anthem","Blue_Shield","Kaiser","Health_Net","Premium","reins_factor","variable_admin","fixed_admin")
	plans_pt <- plans_pt[,keep_fields]
	plans_pt <- plans_pt[plans_pt$Premium != 0,]
	
#### Create plans_pmt object
	# Fields: pmt_name, plan_name, year, rating_area, AV, HMO, Anthem, Blue_Shield, Kaiser, Health_Net
	
	keep_fields <- c("pmt_name","Plan_Name_Small_NOHSACAT","ENROLLMENT_YEAR","region","Metal_Level","PLAN_NETWORK_TYPE","insurer_small")
	plan_data$pmt_name <- paste(plan_data$Plan_Name_Small_NOHSACAT,plan_data$ENROLLMENT_YEAR,plan_data$region,sep="")
	plans_pmt <- plan_data[!duplicated(plan_data$pmt_name),keep_fields]
	
	plans_pmt$plan_name <- plans_pmt$Plan_Name_Small_NOHSACAT
	plans_pmt$year <- plans_pmt$ENROLLMENT_YEAR
	plans_pmt$rating_area <- plans_pmt$region
	plans_pmt$HMO <- as.numeric(plans_pmt$PLAN_NETWORK_TYPE == "HMO")
	plans_pmt$Anthem <- as.numeric(plans_pmt$insurer_small == "Anthem")
	plans_pmt$Blue_Shield <- as.numeric(plans_pmt$insurer_small == "Blue_Shield")
	plans_pmt$Health_Net <- as.numeric(plans_pmt$insurer_small == "Health_Net")
	plans_pmt$Kaiser <- as.numeric(plans_pmt$insurer_small == "Kaiser")
	plans_pmt$Small_Insurer <- as.numeric(plans_pmt$insurer_small == "Small_Insurer")
	plans_pmt$Insurer <- plans_pmt$insurer_small
	
	plans_pmt$AV <- 0.6
	plans_pmt[plans_pmt$Metal_Level == "Silver","AV"] <- 0.7
	plans_pmt[plans_pmt$Metal_Level == "Gold","AV"] <- 0.8
	plans_pmt[plans_pmt$Metal_Level == "Platinum","AV"] <- 0.9
	
	# Set by CMS (see Pope et al 2014 article on transfer formula)
	plans_pmt$IDF <- 1
	plans_pmt[plans_pmt$Metal_Level == "Silver","IDF"] <- 1.03
	plans_pmt[plans_pmt$Metal_Level == "Gold","IDF"] <- 1.08
	plans_pmt[plans_pmt$Metal_Level == "Platinum","IDF"] <- 1.15
	
	# Read in Geographic Factors set CMS
	plans_pmt$GCF <- geogdata[paste(plans_pmt$rating_area,plans_pmt$year,sep="_"),"GCF"]
	
	# Add proj_name and rate_name
	if(network_flag) {
		rsdata <- read.csv("moments_data_net.csv",header=TRUE,row.names=1)
	else {
		rsdata <- read.csv("moments_data_V11.csv",header=TRUE,row.names=1)
	}
	plans_pmt$rate_name <- plans_pmt$pmt_name	
	plans_pmt[!plans_pmt$rate_name %in% rsdata$pmt_name,"rate_name"]	<- 
		paste(plans_pmt[!plans_pmt$rate_name %in% rsdata$pmt_name,"plan_name"],plans_pmt[!plans_pmt$rate_name %in% rsdata$pmt_name,"year"],0,sep="")
	
		# Fix remaining discrepancy
		plans_pmt[plans_pmt$rate_name == "BS_G320170","rate_name"] <- "BS_G320174" # B/c we combined regions 2 and 4
		
	plans_pmt$proj_name <- rsdata[plans_pmt$rate_name,"proj_name"]
	
	keep_fields <- c("pmt_name","proj_name","rate_name","plan_name","year","rating_area","AV","HMO","IDF","GCF",
		"Anthem","Blue_Shield","Kaiser","Health_Net","Small_Insurer","Insurer")
	plans_pmt <- plans_pmt[,keep_fields]
	
#### Check rating factor in julia data object

	# Let's make sure the rating factor (actually I use age_geog_factor) is correct
	rownames(plans_pt) <- plans_pt$pt_name
	julia_data$pt_name <- paste(julia_data$plan_name,julia_data$year,sep="")
	
	julia_data$base_premium <- NA
	julia_data[julia_data$AV > 0,"base_premium"] <- plans_pt[julia_data[julia_data$AV > 0,"pt_name"],"Premium"]  
	
	julia_data$calculated_premium <- julia_data$premium + julia_data$subsidy + julia_data$penalty
	
	julia_data$age_geog_factor <- julia_data$calculated_premium/julia_data$base_premium
	julia_data[is.na(julia_data$age_geog_factor),"age_geog_factor"] <- 0
	
	julia_data$household_id <- households[julia_data$household_number,"household_id"]
	
#### Format output

	if(network_flag) {
		keep_columns <- c("plan_name","household_number","household_id","choice","previous_choice","new_plan","network_breadth","network_inclus","network_pairinclus",
			"premium","subsidy","penalty","year",
			"weight","uninsured_plan","AV","pricing_factor","age_geog_factor","age_factor","cost_factor","ra_factor","GCF","choice_set_id","residual")
	} else {
		keep_columns <- c("plan_name","household_number","household_id","choice","previous_choice","new_plan","premium","subsidy","penalty","year",
			"weight","uninsured_plan","AV","pricing_factor","age_geog_factor","age_factor","cost_factor","ra_factor","GCF","choice_set_id","residual")
	}
	
	if(network_flag) {
		write.csv(julia_data[,keep_columns],file="ca_julia_supply_data_JUN182021_net.csv",row.names=FALSE)
		write.csv(plans_pt,file="ca_plan_year_JUN182021_net.csv",row.names=FALSE)	
		write.csv(plans_pmt,file="ca_plan_market_year_JUN182021_net.csv",row.names=FALSE)
	} else if(churn_flag) {
		write.csv(julia_data[,keep_columns],file="ca_julia_supply_data_JUN262020_churn.csv",row.names=FALSE)
		write.csv(plans_pt,file="ca_plan_year_JUN262020_churn.csv",row.names=FALSE)	
		write.csv(plans_pmt,file="ca_plan_market_year_JUN262020_churn.csv",row.names=FALSE)
	} else {
		write.csv(julia_data[,keep_columns],file="ca_julia_supply_data_JUN262020_learn.csv",row.names=FALSE)
		write.csv(plans_pt,file="ca_plan_year_JUN262020_learn.csv",row.names=FALSE)	
		write.csv(plans_pmt,file="ca_plan_market_year_JUN262020_learn.csv",row.names=FALSE)
	}
	gc()
